{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "## **LangChain Doc Summarizer**"
      ],
      "metadata": {
        "id": "C30t7RVa0huh"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Install the necessary packages"
      ],
      "metadata": {
        "id": "kzmrzSEA1bmt"
      }
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "Y4j13SNO0MF1"
      },
      "outputs": [],
      "source": [
        "!pip install langchain\n",
        "!pip install openai\n",
        "!pip install streamlit\n",
        "!pip install tiktoken\n",
        "!pip install chromadb\n",
        "!pip install pypdf\n",
        "!pip install pycryptodome"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Create a Streamlit app and save to a local file"
      ],
      "metadata": {
        "id": "l3Dj8b_D1ZNy"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%%writefile app.py\n",
        "import os, tempfile\n",
        "import streamlit as st\n",
        "from langchain.llms.openai import OpenAI\n",
        "from langchain.vectorstores.chroma import Chroma\n",
        "from langchain.embeddings.openai import OpenAIEmbeddings\n",
        "from langchain.chains.summarize import load_summarize_chain\n",
        "from langchain.document_loaders import PyPDFLoader\n",
        "\n",
        "# Streamlit app\n",
        "st.title('LangChain Doc Summarizer')\n",
        "\n",
        "# Get OpenAI API key and source document input\n",
        "openai_api_key = st.text_input(\"OpenAI API Key\", type=\"password\")\n",
        "source_doc = st.file_uploader(\"Upload Source Document\", type=\"pdf\")\n",
        "\n",
        "# Check if the 'Summarize' button is clicked\n",
        "if st.button(\"Summarize\"):\n",
        "    # Validate inputs\n",
        "    if not openai_api_key.strip() or not source_doc:\n",
        "        st.write(f\"Please provide the missing fields.\")\n",
        "    else:\n",
        "        try:\n",
        "            # Save uploaded file temporarily to disk, load and split the file into pages, delete temp file\n",
        "            with tempfile.NamedTemporaryFile(delete=False) as tmp_file:\n",
        "                tmp_file.write(source_doc.read())\n",
        "            loader = PyPDFLoader(tmp_file.name)\n",
        "            pages = loader.load_and_split()\n",
        "            os.remove(tmp_file.name)\n",
        "            \n",
        "            # Create embeddings for the pages and insert into Chroma database\n",
        "            embeddings=OpenAIEmbeddings(openai_api_key=openai_api_key)\n",
        "            vectordb = Chroma.from_documents(pages, embeddings)\n",
        "\n",
        "            # Initialize the OpenAI module, load and run the summarize chain\n",
        "            llm=OpenAI(temperature=0, openai_api_key=openai_api_key)\n",
        "            chain = load_summarize_chain(llm, chain_type=\"stuff\")\n",
        "            search = vectordb.similarity_search(\" \")\n",
        "            summary = chain.run(input_documents=search, question=\"Write a summary within 150 words.\")\n",
        "            \n",
        "            st.write(summary)\n",
        "        except Exception as e:\n",
        "            st.write(f\"An error occurred: {e}\")"
      ],
      "metadata": {
        "id": "deojXC0J03fQ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Install localtunnel to serve the Streamlit app"
      ],
      "metadata": {
        "id": "52UihvA71Wrf"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!npm install localtunnel"
      ],
      "metadata": {
        "id": "6ECSPDUi1CjD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Run the Streamlit app in the background"
      ],
      "metadata": {
        "id": "xlh9g9YY1VnI"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!streamlit run app.py &>/content/logs.txt &"
      ],
      "metadata": {
        "id": "cRC_OBlV1Fe8"
      },
      "execution_count": 4,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Expose the Streamlit app on port 8501"
      ],
      "metadata": {
        "id": "LaunU6BF1Hxt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!npx localtunnel --port 8501"
      ],
      "metadata": {
        "id": "7RhfyDs01RlU"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}